## Script for Mallinson, Daniel J., Goktug Morcol, Eunsil Yoo, 
## Faisal Shahinshah, Eli Levine, and Saahir Shafi. (2020). 
## "Sharing Economy: A Systematic and Thematic Analysis of the 
## Literature" Information Polity
## 2020-03-05

rm(list = ls(all.names = TRUE)) #Clears R workspace

library(foreign)
library(ngram)

raw.data <- read.csv("Sharing economy dataset for analysis.csv") #Read in dataset
data <- raw.data #Create working dataframe

data$counter <- 1

################# Yearly Publishing Trend (Figure 1) #######################
year.counts <- aggregate(data$counter, by=list(data$year), sum) #calculate number of publications per year
trend <- as.data.frame(seq(2006,2018,1)) #create blank dataframe for all years 2006-2018

names(year.counts) <- c("year", "count") #change column names
names(trend) <- c("year") #change column name 

trend <- merge(trend, year.counts, by=c("year"), all.x=TRUE) #merge the two objects
trend$count[is.na(trend$count)] <- 0 #replace year NAs with 0s

trend <- ts(data=trend$count, 2006, 2018,1) #create time series object with trend

#setEPS()
#postscript("fig1.eps")
tiff("fig2.tiff", width=6, height=6, units="in", res=600)
plot.new()
par(mar=c(4,4,1,1))
plot.window(xlim=c(2006,2018), ylim=c(-35, 260))
lines(trend, lwd=2)
axis(1, at=seq(2006,2018,2), labels=seq(2006,2018,2))
axis(2, at=seq(0,250,50), labels=seq(0,250,50), las=2)
title(ylab="Count of Publications")
text(2012,-5,labels=c("Total Articles"), pos=1, cex=.8)
text(x=seq(2006,2018,1), y=-30, labels=trend)
dev.off()

################# Publishing Disciplines (Table 1) #######################
table(data$pubtype) #Frequency by discipline
sum(table(data$pubtype)) #Total articles 
prop.table(table(data$pubtype))*100 #Percentages by discipline

############### Concepts by Discipline ###########################
table(data$ConceptsUsed)

data$ConceptsUsed[data$pubtype=="Business"]
length(grep("disrupt",data$Abstract[data$pubtype=="Business"])) #Disrupt in Business
length(grep("collaborat",data$Abstract[data$pubtype=="Business"])) 
length(grep("sustain",data$Abstract[data$pubtype=="Business"])) 
data$ConceptsUsed[data$pubtype=="Technology"]
data$ConceptsUsed[data$pubtype=="Economics"]
data$ConceptsUsed[data$pubtype=="Environmental"]
length(grep("sustain",data$Abstract[data$pubtype=="Environmental"])) 
data$ConceptsUsed[data$pubtype=="Tourism"]
length(grep("trust",data$Abstract[data$pubtype=="Tourism"])) 
data$ConceptsUsed[data$pubtype=="Law Review"]
data$ConceptsUsed[data$pubtype=="Sociology"]
data$ConceptsUsed[data$pubtype=="Labor"]
data$ConceptsUsed[data$pubtype=="Political Science"]
data$ConceptsUsed[data$pubtype=="Public Administration"]
data$ConceptsUsed[data$pubtype=="Public Policy"]

length(grep("disrupt",data$Abstract))# Disrupt in all abstracts
length(grep("collaborat", data$Abstract))
length(grep("regulat", data$Abstract))

################# Platforms (Table 3) #######################
table(data$area_clean) #Frequency by platform type
sum(table(data$area_clean)) #Total articles
prop.table(table(data$area_clean))*100

################# Countries (Figure 3) #######################
table(data$country_clean) #Frequency by country
sum(table(data$country_clean)) #Total articles
prop.table(table(data$country_clean))*100

############### Theory by Discipline (Table 4) ###########################
data$theorybase_clean[data$pubtype=="Business"]
data$theorybase_clean[data$pubtype=="Technology"]
data$theorybase_clean[data$pubtype=="Economics"]
data$theorybase_clean[data$pubtype=="Environmental"]
data$theorybase_clean[data$pubtype=="Tourism"]
data$theorybase_clean[data$pubtype=="Law Review"]
data$theorybase_clean[data$pubtype=="Sociology"]
data$theorybase_clean[data$pubtype=="Labor"]
data$theorybase_clean[data$pubtype=="Political Science"]
data$theorybase_clean[data$pubtype=="Public Administration"]
data$theorybase_clean[data$pubtype=="Public Policy"]

################# Methdology (Table 5) #######################
table(data$GeneralMethodology_clean) #Frequency by country
sum(table(data$GeneralMethodology_clean)) #Total articles
prop.table(table(data$GeneralMethodology_clean))*100

######### Methodology by Discipline ################
table(data$GeneralMethodology_clean[data$pubtype=="Business"])
prop.table(table(data$GeneralMethodology_clean[data$pubtype=="Business"]))*100
sum(table(data$GeneralMethodology_clean[data$pubtype=="Business"]))
table(data$GeneralMethodology_clean[data$pubtype=="Technology"])
prop.table(table(data$GeneralMethodology_clean[data$pubtype=="Technology"]))*100
sum(table(data$GeneralMethodology_clean[data$pubtype=="Technology"]))
table(data$GeneralMethodology_clean[data$pubtype=="Economics"])
prop.table(table(data$GeneralMethodology_clean[data$pubtype=="Economics"]))*100
sum(table(data$GeneralMethodology_clean[data$pubtype=="Economics"]))
table(data$GeneralMethodology_clean[data$pubtype=="Environmental"])
prop.table(table(data$GeneralMethodology_clean[data$pubtype=="Environmental"]))*100
sum(table(data$GeneralMethodology_clean[data$pubtype=="Environmental"]))
table(data$GeneralMethodology_clean[data$pubtype=="Tourism"])
prop.table(table(data$GeneralMethodology_clean[data$pubtype=="Tourism"]))*100
sum(table(data$GeneralMethodology_clean[data$pubtype=="Tourism"]))
table(data$GeneralMethodology_clean[data$pubtype=="Law Review"])
prop.table(table(data$GeneralMethodology_clean[data$pubtype=="Law Review"]))*100
sum(table(data$GeneralMethodology_clean[data$pubtype=="Law Review"]))

paffairs <- subset(data, pubtype== "Political Science")
paffairs <- rbind(paffairs, subset(data, pubtype== "Public Administration"))
paffairs <- rbind(paffairs, subset(data, pubtype== "Public Policy"))
table(paffairs$GeneralMethodology_clean)
prop.table(table(paffairs$GeneralMethodology_clean))*100
sum(table(paffairs$GeneralMethodology_clean))


